import requests
import re

for i in range(1, 11):
    page = (i-1) * 25
    url = "https://movie.douban.com/top250?start={page}&filter="

    # UA, 服务器对当前网络设备进行检测
    headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
    }

    response = requests.get(url, headers=headers)       # 处理一个小小的反爬虫

    response.encoding = response.apparent_encoding        #  固定写法  记死了

    # print(response.text)

    obj = re.compile(r'<div class="item">.*?<span class="title">(?P<name>.*?)</span>'
                     r'.*?<br>(?P<year>.*?)&nbsp;.*?<span class="rating_num" '
                     r'property="v:average">(?P<score>.*?)</span>.*?'
                     r'<span>(?P<num>.*?)人评价</span>', re.S)    # re.S 可以让re匹配到换行符

    result = obj.finditer(response.text)
    for item in result:
        print(item)
        dic = item.groupdict()
        dic["year"] = dic["year"].strip()
        # 去掉年份左右两段的空白(空格，换行符，制表符)
        #
        # with open("douban250.txt", "a", encoding="utf-8") as f:
        #     f.write(str(dic).strip("{}") + "\n")
        print(dic)
print(result)